In [1]:
    
%matplotlib
    
    
In [2]:
    
import sys
import pandas as pa
import numpy as np
from PrimeText import PrimeText
import matplotlib.pyplot as plt
    
In [3]:
    
pt = PrimeText()
ytData = pa.read_csv("utubeES.csv",encoding ='ISO-8859-1')
comments = ytData['comment']
    
In [9]:
    
pt.cleanData(comments)
pt.assembleDictionary()
pt.indexDictionary()
pt.indexComments()
    
    
In [12]:
    
keyText = []
keyCount = []
for key, value in pt.indexedDictionary.items():
    c = pt.countInRecords([key])
    keyText.append(key)
    keyCount.append(c)
    
s1 = pa.Series(keyCount,index=keyText)
sortedS1  = s1.sort_values(ascending= False)[:50]
sortedS1.plot.bar()
    
    Out[12]:
In [6]:
    
df = pa.DataFrame(index=sortedS1.index, columns=sortedS1.index)
df = df.fillna(0)
    
In [7]:
    
names = sortedS1.index
colsdone = 0
for col in names:
    colsdone += 1
    sys.stdout.write("\rCols done : %i" % colsdone)
    for row in names:
        df[col][row] = pt.countInRecords([col,row])
sys.stdout.write("\n")
sys.stdout.flush()
    
    
In [11]:
    
imgplot = plt.imshow(df,interpolation="nearest")
plt.xticks( range(len(names)), names, rotation=90 )
plt.yticks( range(len(names)), names, rotation=0 )
plt.colorbar()
plt.show()
    
In [ ]:
    
    
In [ ]: